pacman::p_load(
  rio, # import funcs
  sf, # work with spatial data
  here, # create relative paths
  janitor, # data cleaning
  lubridate, # date handling
  tidyverse # data science
)
conflicted::conflict_prefer("select", "dplyr")
conflicted::conflict_prefer("filter", "dplyr")
# linelist
dat_raw <- import(here::here("data", "final", "msf_linelist_moissala_2023-06-11.xlsx")) |> as.tibble()

# lab data
lab_raw <- import(here::here("data", "final", "msf_laboratory_moissala_2023-06-11.xlsx"))

# admin data
admin_1 <- st_read(here::here("data", "gpkg", "GEO-EXPORT-TCD-2024-04-11.gpkg"), layer = "ADM1")
## Reading layer `ADM1' from data source 
##   `/Users/hugzsoubrier/GitHub/fake-data/data/gpkg/GEO-EXPORT-TCD-2024-04-11.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 23 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 13.47348 ymin: 7.441069 xmax: 24.00269 ymax: 23.45037
## Geodetic CRS:  WGS 84
admin_2 <- st_read(here::here("data", "gpkg", "GEO-EXPORT-TCD-2024-04-11.gpkg"), layer = "ADM2")
## Reading layer `ADM2' from data source 
##   `/Users/hugzsoubrier/GitHub/fake-data/data/gpkg/GEO-EXPORT-TCD-2024-04-11.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 132 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 13.47348 ymin: 7.441069 xmax: 24.00269 ymax: 23.45037
## Geodetic CRS:  WGS 84

Introduction

This document provides directions for the analysis of the fake Measles dataset msf_linelist_moissala_2023-09-24.xlsx and the its corresponding laboratory dataset msf_laboratory_moissala_2023-09-24.xlsx.

Data Cleaning

Age classification

For measles outbreaks, it makes sense to use the following age classification:

  • 0 - 11 months
  • < 6 months
  • 6 - 8 months
  • 9 - 11 months
  • 1 - 4 years
  • 5 - 14 years
  • 15+ years

Middle Upper Arm Circumferences (MUAC)

MUAC is classified as follow:

  • Green (125+ mm)
  • Yellow (115 - 124 mm)
  • Red (<115 mm)

Epidemiological classification

confirmed: cases with a positive PCR result in lab data probable: cases with fever, coughand a rash suspected: all other cases

dat <- dat_raw |>
  
  # standardise variable names
  janitor::clean_names() |>
  
  # manually rename
  rename(
    id = epi_id_number,
    sex = sex_patient,
    age_unit = age_units_months_years,
    date_onset = date_of_onset_of_symptoms,
    hospitalisation = hospitalisation_yes_no,
    date_admission = date_of_admission_in_structure,
    date_outcome = death_outcome_death_recovered_lama,
    sub_prefecture = sub_prefecture_of_residence,
    region = region_of_residence,
    fever = participant_had_fever,
    rash = participant_had_rash,
    cough = participant_had_cough,
    red_eye = participant_had_red_eye,
    pneumonia = participant_had_pneumonia,
    encephalitis = participant_had_encephalitis,
    muac = middle_upper_arm_circumference_muac,
    vacc_status = vaccination_status,
    vacc_doses = vaccination_dosage,
    outcome = patient_outcome,
    site = msf_site,
    malaria_rdt = malaria_rdt
  ) |>
  
  # recoding
  mutate(
    sex = case_when(
      sex %in% c("f", "femme") ~ "female",
      sex %in% c("m", "homme") ~ "male",
      .default = sex
    ),
    across(contains("date_"), ~ ymd(.x)),
    across(c(fever, rash, cough, red_eye, pneumonia, encephalitis), ~ case_match(.x, "Yes" ~ TRUE, "No" ~ FALSE, .default = NA))
  ) |>
  
  # Categorise variables
  mutate(
    age_group = case_when(
      age_unit == "months" & age < 6 ~ "< 6 months",
      age_unit == "months" & between(age, 6, 8) ~ "6 - 8 months",
      age_unit == "months" & between(age, 9, 11) ~ "9 - 11 months",
      age_unit == "years" & between(age, 1, 4) ~ "1 - 4 years",
      age_unit == "years" & between(age, 5, 14) ~ "5 - 14 years",
      age_unit == "years" & between(age, 15, 40) ~ "15+ years"
    ),
    age_group = fct_relevel(
      age_group,
      c(
        "< 6 months",
        "6 - 8 months",
        "9 - 11 months",
        "1 - 4 years",
        "5 - 14 years",
        "15+ years"
      )
    ),
    muac_cat = case_when(
      muac >= 125 ~ "Green (125+ mm)",
      between(muac, 115, 124) ~ "Yellow (115 - 124 mm)",
      muac < 115 ~ "Red (<115 mm)"
    )
  ) |>
  relocate(
    age_group,
    .after = age_unit
  ) |>
  relocate(muac_cat, .after = muac) |> 
  
  filter(date_onset >= "2022-01-01")

Laboratory data

lab_clean <- lab_raw |>
  clean_names() |>
  rename(
    case_id = msf_number_id,
    lab_id = laboratory_id,
    date_test = date_of_the_test,
    test_result = final_test_result
  ) |>
  mutate(
    date_test = ymd(date_test),
    ct_value = round(ct_value, digits = 1)
  )

There are some duplicates in the laboratory results. Some case_id were tested multiple times if there was a inconclusive test_result. We need to find them, and take the last sample tested

reactable::reactable(lab_clean |> get_dupes(case_id))
lab_clean <- lab_clean |>
  filter(
    .by = case_id,
    date_test == max(date_test, na.rm = TRUE)
  )

Some samples were negative, so these cases are not cases and need to be removed from analysis

lab_clean |> count(test_result)
##   test_result   n
## 1    negative  62
## 2    positive 481

We join the lab_clean to the main linelists using the case_id as key. Then remove the negative case, and create an epidemiological classification

dat <- left_join(dat, lab_clean, by = c("id" = "case_id"))

dat <- dat |>
  filter(is.na(test_result) | test_result == "positive") |>
  mutate(
    epi_cat = case_when(
      test_result == "positive" ~ "confirmed",
      rash == TRUE & fever == TRUE & cough == TRUE ~ "probable",
      .default = "suspected"
    ),
    epi_cat = fct_relevel(epi_cat, c("confirmed", "probable", "suspected"))
  )

reactable::reactable(dat |> tabyl(epi_cat) |> mutate(percent = round(percent * 100, digits = 2)))

Person

Demographics

dat |>
  select(
    sex,
    age_group,
    muac_cat,
    vacc_status
  ) |>
  gtsummary::tbl_summary(label = list(
    sex ~ "Gender",
    age_group ~ "Age groups",
    muac_cat ~ "MUAC category",
    vacc_status = "Vaccination status",
    malaria_rdt = "Malaria RDT",
    outcome = "Outcome"
  ))
Characteristic N = 2,7881
Gender
    female 1,428 (51%)
    male 1,360 (49%)
Age groups
    < 6 months 145 (5.4%)
    6 - 8 months 302 (11%)
    9 - 11 months 276 (10%)
    1 - 4 years 1,274 (47%)
    5 - 14 years 528 (20%)
    15+ years 167 (6.2%)
    Unknown 96
MUAC category
    Green (125+ mm) 2,038 (73%)
    Red (<115 mm) 254 (9.1%)
    Yellow (115 - 124 mm) 496 (18%)
Vaccination status
    No 1,503 (64%)
    Uncertain 440 (19%)
    Yes - card 22 (0.9%)
    Yes - oral 395 (17%)
    Unknown 428
1 n (%)

By sites

dat |>
  select(
    sex,
    age_group,
    muac_cat,
    vacc_status,
    site
  ) |>
  gtsummary::tbl_summary(
    by = site,
    label = list(
      sex ~ "Gender",
      age_group ~ "Age groups",
      muac_cat ~ "MUAC category",
      vacc_status = "Vaccination status",
      malaria_rdt = "Malaria RDT",
      outcome = "Outcome"
    )
  )
Characteristic Bedaya Hospital, N = 4321 Bekourou Hospital, N = 1581 Bouna Hospital, N = 5141 Danamadji Hospital, N = 1481 Koumogo Hospital, N = 51 Moïssala Hospital, N = 1,5311
Gender





    female 215 (50%) 81 (51%) 251 (49%) 76 (51%) 2 (40%) 803 (52%)
    male 217 (50%) 77 (49%) 263 (51%) 72 (49%) 3 (60%) 728 (48%)
Age groups





    < 6 months 23 (5.5%) 9 (5.8%) 23 (4.6%) 9 (6.4%) 0 (0%) 81 (5.5%)
    6 - 8 months 58 (14%) 18 (12%) 54 (11%) 15 (11%) 0 (0%) 157 (11%)
    9 - 11 months 42 (10%) 16 (10%) 59 (12%) 17 (12%) 0 (0%) 142 (9.6%)
    1 - 4 years 185 (44%) 78 (51%) 234 (47%) 62 (44%) 3 (60%) 712 (48%)
    5 - 14 years 85 (20%) 23 (15%) 89 (18%) 32 (23%) 2 (40%) 297 (20%)
    15+ years 23 (5.5%) 10 (6.5%) 37 (7.5%) 6 (4.3%) 0 (0%) 91 (6.1%)
    Unknown 16 4 18 7 0 51
MUAC category





    Green (125+ mm) 329 (76%) 113 (72%) 381 (74%) 101 (68%) 5 (100%) 1,109 (72%)
    Red (<115 mm) 35 (8.1%) 16 (10%) 42 (8.2%) 16 (11%) 0 (0%) 145 (9.5%)
    Yellow (115 - 124 mm) 68 (16%) 29 (18%) 91 (18%) 31 (21%) 0 (0%) 277 (18%)
Vaccination status





    No 241 (65%) 92 (67%) 272 (63%) 77 (63%) 1 (50%) 820 (63%)
    Uncertain 55 (15%) 18 (13%) 95 (22%) 24 (20%) 1 (50%) 247 (19%)
    Yes - card 6 (1.6%) 2 (1.4%) 3 (0.7%) 2 (1.6%) 0 (0%) 9 (0.7%)
    Yes - oral 67 (18%) 26 (19%) 61 (14%) 20 (16%) 0 (0%) 221 (17%)
    Unknown 63 20 83 25 3 234
1 n (%)

Age Pyramids

dat |>
  select(
    sex,
    age_group,
    site
  ) |>
  apyramid::age_pyramid(
    age_group = "age_group",
    split_by = "sex",
    proportional = TRUE,
    show_midpoint = TRUE
  ) +

  theme_minimal()

CFR analysis by site

# CFR only on known outcomes
dat |>
  summarise(
    .by = site,
    n_cases = n(),
    n_confirmed = sum(epi_cat == "confirmed"),
    n_deaths = sum(outcome == "dead", na.rm = TRUE),
    cfr = round(digits = 2, n_deaths / sum(outcome %in% c("recovered", "dead")) * 100)
  ) |>
  reactable::reactable(columns = list(
    n_cases = reactable::colDef(name = "N cases"),
    n_confirmed = reactable::colDef(name = "N confirmed"),
    n_deaths = reactable::colDef(name = "N deaths"),
    cfr = reactable::colDef(name = "CFR (%)")
  ))

Risks Factor analysis

Investigating age_group, muac_cat and vacc_status as risks factors for death

# Prepare the data for fitting the logistic regression
prep_logit <- dat |>
  # change group order for references
  mutate(
    age_group = fct_relevel(
      age_group,
      c(
        "15+ years",
        "< 6 months",
        "6 - 8 months",
        "9 - 11 months",
        "1 - 4 years",
        "5 - 14 years"
      )
    ),
    muac_cat = fct_relevel(
      muac_cat,
      c(
        "Green (125+ mm)",
        "Yellow (115 - 124 mm)",
        "Red (<115 mm)"
      )
    ),
    vacc_status = case_match(
      vacc_status,
      "Yes - card" ~ "Yes",
      "Yes - oral" ~ "Yes",
      "Uncertain" ~ NA,
      .default = vacc_status
    ),
    vacc_status = fct_relevel(
      vacc_status,
      c(
        "No",
        "Yes"
      )
    ),

    # outcome needs to be 1/0
    outcome_binary = case_when(
      outcome == "recovered" ~ 0,
      outcome == "dead" ~ 1,
      .default = NA
    )
  )

# fit the logistic regression
mdl <- glm(outcome_binary ~ sex + age_group + vacc_status + muac_cat, data = prep_logit, family = "binomial")

# view coeff
gtsummary::tbl_regression(
  mdl,
  exp = TRUE,
  label = list(
    sex ~ "Gender",
    age_group ~ "Age groups",
    muac_cat ~ "MUAC category",
    vacc_status = "Vaccination status"
  ),
  intercept = TRUE,
  conf.int = TRUE
)
Characteristic OR1 95% CI1 p-value
(Intercept) 0.04 0.01, 0.11 <0.001
Gender


    female
    male 1.04 0.75, 1.45 0.8
Age groups


    15+ years
    < 6 months 5.67 2.01, 20.3 0.003
    6 - 8 months 3.57 1.33, 12.5 0.022
    9 - 11 months 3.11 1.13, 11.0 0.044
    1 - 4 years 1.99 0.79, 6.70 0.2
    5 - 14 years 0.74 0.23, 2.82 0.6
Vaccination status


    No
    Yes 0.34 0.18, 0.57 <0.001
MUAC category


    Green (125+ mm)
    Yellow (115 - 124 mm) 1.71 1.12, 2.57 0.011
    Red (<115 mm) 2.69 1.68, 4.21 <0.001
1 OR = Odds Ratio, CI = Confidence Interval

Time

dat |>
  mutate(
    epiweek = floor_date(date_onset, unit = "week")
  ) |>
  ggplot() +
  geom_bar(
    aes(
      x = epiweek,
      fill = epi_cat
    ),
    position = position_stack()
  ) +
  scale_x_date(
    breaks = "2 weeks",
    date_labels = "%Y -W%W"
  ) +
  scale_fill_manual(
    "Epi status",
    values = c(
      "confirmed" = "#912c2c",
      "probable" = "#c4833d",
      "suspected" = "#edd598"
    )
  ) +
  labs(
    x = "Epiweek",
    y = "N cases",
    title = glue::glue("Epicurve of measle outbreak in Southern Chad"),
    subtitle = glue::glue({
      "{nrow(dat)} cases observed from {min(dat$date_onset, na.rm = TRUE)} to {max(dat$date_onset, na.rm = TRUE)}"
    })
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, size = 5))

By sites

dat |>
  mutate(
    epiweek = floor_date(date_onset, unit = "week")
  ) |>
  ggplot() +
  geom_bar(
    aes(
      x = epiweek,
      fill = site
    ),
    position = position_stack()
  ) +
  scale_x_date(
    breaks = "4 weeks",
    date_labels = "%Y -W%W"
  ) +
  labs(
    x = "Epiweek",
    y = "N cases",
    title = glue::glue("Epicurve of measle outbreak in Southern Chad"),
    subtitle = glue::glue({
      "{nrow(dat)} cases observed from {min(dat$date_onset, na.rm = TRUE)} to {max(dat$date_onset, na.rm = TRUE)}"
    })
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, size = 5)) +
  facet_wrap(~site) +
  gghighlight::gghighlight()

Place

Make a Choropleth map

# clean admin data
dat <- dat |>
  mutate(across(c(sub_prefecture, region), ~ str_to_sentence(.x)))

# count cases by adm2

adm_summ <- dat |> summarise(
  .by = c(region, sub_prefecture),
  n_cases = n(),
  n_deaths = sum(outcome == "dead", na.rm = TRUE),
  cfr = round(digits = 3, n_deaths / sum(outcome %in% c("recovered", "dead", na.rm = TRUE))),
  cfr_lab = scales::percent(cfr, accuracy = .1)
)

# join the count data to the sf

chor_dat <- left_join(
  admin_2,
  adm_summ,
  by = c("adm2_name" = "sub_prefecture")
) |>
  # add AR using population data
  mutate(
    AR = round(digits = 3, n_cases / adm2_pop * 1000),
    label = (paste0(
      "<b>Region:</b> ",
      adm1_name,
      "<br><b>Sub-prefecture:</b> ",
      adm2_name,
      "<br><b>Population:</b> ",
      adm2_pop,
      "<br><b>Attack Rate:</b> ",
      AR,
      "<br><b>CFR (%):</b> ",
      cfr_lab
    ))
  )
leaf_basemap <- function(
  bbox,
  baseGroups = c("Light", "OSM", "OSM HOT"),
  overlayGroups = c("Boundaries"),
  miniMap = TRUE
) {
  lf <- leaflet::leaflet() %>%
    leaflet::fitBounds(bbox[["xmin"]], bbox[["ymin"]], bbox[["xmax"]], bbox[["ymax"]]) %>%
    leaflet::addMapPane(name = "choropleth", zIndex = 310) %>%
    leaflet::addMapPane(name = "place_labels", zIndex = 320) %>%
    leaflet::addMapPane(name = "circles", zIndex = 410) %>%
    leaflet::addMapPane(name = "boundaries", zIndex = 420) %>%
    leaflet::addMapPane(name = "geo_highlight", zIndex = 430) %>%
    leaflet::addProviderTiles("CartoDB.PositronNoLabels", group = "Light") %>%
    leaflet::addProviderTiles(
      "CartoDB.PositronOnlyLabels",
      group = "Light",
      options = leaflet::leafletOptions(pane = "place_labels")
    ) %>%
    leaflet::addProviderTiles("OpenStreetMap", group = "OSM") %>%
    leaflet::addProviderTiles("OpenStreetMap.HOT", group = "OSM HOT") %>%
    leaflet::addScaleBar(
      position = "bottomright",
      options = leaflet::scaleBarOptions(imperial = FALSE)
    ) %>%
    leaflet::addLayersControl(
      baseGroups = baseGroups,
      overlayGroups = overlayGroups,
      position = "topleft"
    )

  if (miniMap) {
    lf <- lf %>% leaflet::addMiniMap(toggleDisplay = TRUE, position = "bottomleft")
  }

  return(lf)
}

bbox <- st_bbox(filter(admin_2, adm1_name == "Mandoul"))
bins <- c(0, 1, 5, 10, 20, Inf)
pal <- leaflet::colorBin("YlOrRd", domain = chor_dat$AR, bins = bins)
labels <- chor_dat$label |> lapply(htmltools::HTML)

leaflet::leaflet() |>
  leaf_basemap(bbox, miniMap = TRUE) |>
  leaflet::fitBounds(as.character(bbox)[1], as.character(bbox)[2], as.character(bbox)[3], as.character(bbox)[4]) |>
  leaflet::addProviderTiles("CartoDB.Positron", group = "Light") |>
  leaflet::addScaleBar(position = "bottomright", options = leaflet::scaleBarOptions(imperial = FALSE)) |>
  leaflet.extras::addFullscreenControl(position = "topleft") |>
  leaflet.extras::addResetMapButton() |>
  leaflet::addPolygons(
    data = admin_1,
    stroke = TRUE,
    weight = 1.5,
    color = "black",
    fill = FALSE,
    fillOpacity = 0
  ) |>
  leaflet::addPolygons(
    data = chor_dat,
    label = ~labels,
    stroke = TRUE,
    weight = 1.2,
    color = "grey",
    fillColor = ~ pal(AR),
    fillOpacity = 0.3
  )